#############################################################################################################################
# Comparison of MrP and MrsP with Swiss votes
# November 2016
# lleemann@gmail.com
##############################################################################################################################


rm(list=ls())

# read in packages
library(foreign)
library(lme4)
library(blme)
library(arm)
library(xtable)


# set the working directory
setwd(".../Final replication file")

# DATA
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# official data (referendums)
true.yesShare <- read.csv("BfS data.csv")

# surevy data
voxit.data <- read.dta("Voxit.dta", convert.factors=FALSE)

# official data (census)
block <- read.csv("census final BfS 11 2012 2.csv", header = TRUE)


# Some information needed for Appendix A.1 (correlations of level 1 variables)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~



round(cor(as.matrix(cbind(voxit.data$educ,voxit.data$age_group,voxit.data$sexe)), use="complete.obs"),2)

vote.ids <- names(table(voxit.data$AbstNrBFS))
vote.ids <- as.numeric(vote.ids)


corrBlock <- array(NA,c(3,3,length(vote.ids)))
for (z in vote.ids){
	DATAAA <- voxit.data[voxit.data$AbstNrBFS==z,]
	corrBlock[,,which(vote.ids==z)] <- 	round(cor(as.matrix(cbind(DATAAA$educ,DATAAA$age_group,DATAAA$sexe)), use="complete.obs"),2)
	print(z)
	}

mean.corr <- apply(corrBlock, c(1,2), mean)
xtable(mean.corr, digits=2)
sort(c(corrBlock ))



# MODELS
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# response models ---- 3 individ. predictor: GENDER, EDUCATION, AGE
# MRP
model.1 <- y ~ x2.3 + x2.4 + (1|x1.gender) + (1|x1.education) + (1|x1.age) + (1|canton)  + (1|region)
model.2 <- y ~ x2.3 + x2.4 +  x2.5 + x2.6 + x2.7 + x2.8  + x2.10 + (1|x1.gender) + (1|x1.education) + (1|x1.age) + (1|canton) + (1|region)
models <- c(model.1, model.2)

# MrsP
model.L1 <- y ~ x2.3 + x2.4 + (1|x1.gender) + (1|x1.age) + (1|canton) +  (1|x1.education)  + (1|region)
model.L2 <- y ~ x2.3 + x2.4 +  (1|x1.partyL) +(1|x1.gender) + (1|x1.age) + (1|canton) + (1|x1.education) + (1|region)
models.m <- c(model.L1, model.L2)



# ANALYSIS
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

prediction.mrp <- array(NA, c(3, 26, length(vote.ids)))
prediction.mrsp <- array(NA, c(3, 26, length(vote.ids)))
N.sample.dis <- rep(NA, length(vote.ids))
pred.dis <- matrix(NA, 26, length(vote.ids))

N.sample.mrpj <- rep(NA, length(vote.ids)) 	# joint distirbution "j"
N.sample.mrps <- rep(NA, length(vote.ids))	# synthetic joint "s"



start.time <- Sys.time()
for (z in vote.ids){
	data.run <- voxit.data[voxit.data$AbstNrBFS==z,]
	print(z)
	official.results <- as.numeric(true.yesShare[true.yesShare[,27]==z,1:26])
	prediction.mrp[3, ,which(vote.ids==z)] <- official.results 	# 3rd element of array carries true outcome
	prediction.mrsp[3, ,which(vote.ids==z)] <- official.results	# 3rd element of array carries true outcome

	data.run <- voxit.data[voxit.data$AbstNrBFS==z,]
	raw.out <- data.run[,c(1:2)] 	# getting survey yes-vote and canton
	raw.out <- na.omit(raw.out)		# dropping observations which did not respond whether they voted yes or not
	N.sample.dis[which(vote.ids==z)] <- dim(raw.out)[1]
	for (p in 1:26){
		nobs.p <- length(which(raw.out[,2]==p))		# number of obs in canton p
		www <- mean(raw.out[raw.out[,2]==p,1])
		ifelse(nobs.p!=0, pred.dis[p,which(vote.ids==z)] <- www, pred.dis[p,which(vote.ids==z)] <- mean(raw.out[,1]))
	}
	
	data.run <- na.omit(data.run[,-c(27:36)]) # only necessary for the money version data - check
	y <- data.run$decx
	canton <- data.run$canton
	region <- data.run$region
	x2.3 <- data.run$german_share
	x2.4 <- data.run$romkath_share
	x2.5 <- data.run$partyL1s			
	x2.6 <- data.run$partyL2s			
	x2.7<- data.run$partyL3s
	x2.8 <- data.run$partyL4s
	x2.9 <- data.run$partyL5s
	x2.10 <- data.run$partyL6s
	x1.gender <- data.run$sexe    # 1=man, 2=woman
	x1.education <- data.run$educ
	x1.age <- data.run$age_group
	x1.partyL <- data.run$partyL
	
	# Use survey data to learn about unknown true joint distribution
	# read-out party frequency by ideal type:
		party.ideal <- matrix(NA,48,7)
		n <- 0
		for (k in 1:6){
			for (l in 1:4){
				for (m in 1:2){
						n <- n + 1
						party.pull.out <- c(x1.partyL[x1.education==k & x1.age==l & x1.gender==m])
						party.ideal[n,1] <- length(which(party.pull.out==1))	
						party.ideal[n,2] <- length(which(party.pull.out==2))	
						party.ideal[n,3] <- length(which(party.pull.out==3))	
						party.ideal[n,4] <- length(which(party.pull.out==4))	
						party.ideal[n,5] <- length(which(party.pull.out==5))	
						party.ideal[n,6] <- length(which(party.pull.out==6))	
						party.ideal[n,7] <- length(which(party.pull.out==7))	
				}
			}
		}
	# last couple of lines generate "party.ideal" which reflects our best knowledge 
	# about the types of people and their party preferences based on survey data

	###
	# Running two models for both MrP and MrsP
	for (w in 1:2){
		modeL <- models[[w]]
		####	
		# MrP
		if(z!=4800) model.r <- glmer(modeL, family=binomial(probit), control = glmerControl(optimizer = "bobyqa")) 
		if(z==4800) model.r <- bglmer(modeL, family=binomial(probit), cov.prior = "invwishart", control = glmerControl(optimizer = "bobyqa")) 
		if(w==2) N.sample.mrpj[which(vote.ids==z)] <- dim(model.r@frame)[1]		
			## do prediction based on this response model
			source("source_MrP_CH.R")  
			prediction.mrp[w,,which(vote.ids==z)] <- prediction

		####
		# MrsP 
		model.mrsp <- models.m[[w]]
		# vote 4800 needs bglmer for convergence reasons.
		if(z!=4800 & z!=5340) model.r <- glmer(model.mrsp, family=binomial(probit), control = glmerControl(optimizer = "bobyqa"))
		if(z==4800 | z==5340) model.r <- bglmer(model.mrsp, family=binomial(probit), cov.prior = "invwishart",  control = glmerControl(optimizer = "bobyqa"))
		if(w==2) N.sample.mrps[which(vote.ids==z)] <-  dim(model.r@frame)[1]	
			source("source_MrsP_CH_adjustedsynthetic.R") 
			#source("source_MRmP_v1.R")  
			prediction.mrsp[w,,which(vote.ids==z)] <- predictionLMRP
			warn = warnings()
			if (!is.null(warn)) {
			  print(warn)
			  print(w)
			}
		}
}


end.time <- Sys.time()
cat(paste("Job started at:",start.time))
cat(paste("Job started at:",end.time))
dur <- end.time-start.time
cat(paste("Job took:",dur))



# Generating Absolute Error
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

pred.error.mrp <- array(NA,dim=c(3,26,186))
for (i in 1:186){				# for MRP
	pred.error.mrp[3,,i] <- as.matrix(prediction.mrp[3,,i]/100)
	for (j in 1:2){
		pred.error.mrp[j,,i] <- as.matrix(prediction.mrp[j,,i] -pred.error.mrp[3,,i])		
	}
}

pred.error.mrsp <- array(NA,dim=c(3,26,186))
for (i in 1:186){				# for MRmP
	pred.error.mrsp[3,,i] <- as.matrix(prediction.mrsp[3,,i]/100)
	for (j in 1:2){
		pred.error.mrsp[j,,i] <- as.matrix(prediction.mrsp[j,,i] -pred.error.mrsp[3,,i])		
	}
}


prediction.dis <- pred.dis
pred.err.dis <- prediction.dis - pred.error.mrp[3,,]


#  A B S O L U T E   error 
sq.pred.error.mrp1 <- abs(pred.error.mrp[1,,])
sq.pred.error.mrp2 <- abs(pred.error.mrp[2,,])
sq.pred.error.mrsp1 <- abs(pred.error.mrsp[1,,])
sq.pred.error.mrsp2 <- abs(pred.error.mrsp[2,,])
sq.pred.err.dis <- abs(pred.err.dis)


# Plot 1
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

### first plot

# vote by vote - create order
mse <- cbind(colMeans(sq.pred.error.mrp1),colMeans(sq.pred.error.mrsp1), colMeans(sq.pred.err.dis))
mse.order <- mse[order(mse[,2]+mse[,1]),]
mse.order <- cbind(mse.order,c(1:186))
#

png("Figure02.png", width = 1200, height = 500, points=16)
#par(family="CMU Serif", mfrow=c(1,2), mai=c(.8,1.4,.1,1))
par(mfrow=c(1,2), mai=c(.8,1.4,.1,1))
# 1st plot
plot(mse.order[,4], mse.order[,2], pch=1, col="white", ylim=c(-0.01,0.26), xlim=c(0,185), xlab="186 Swiss Referendums", ylab="Mean Absolute Error", main="", cex.lab=1.5)
rect(xleft=-9,ybottom=-0.0033,199,0.07, col=rgb(190,190,190,50,maxColorValue=255), border=NA)
	# lines
	abline(h=c(0,0.05,0.1,0.15,0.2,0.25,0.3), lty=2, col="grey78", lwd=0.5)
	abline(v=c(0,25,50,75,100,125,150, 175), lty=2, col="grey78", lwd=0.5)
	abline(h=0, col="gray")
	# points
	points(mse.order[,4], mse.order[,2], pch=20, col=rgb(0,0,255,150, maxColorValue=255),cex=0.923, lwd=1)
	points(mse.order[,4], mse.order[,1], pch=0, col=rgb(255,0,0,150, maxColorValue=255), cex=1.3, lwd=1.5)
	points(lowess(mse.order[,4], mse.order[,3],f=1/6), pch=19, col=rgb(0, 0, 0, 80, maxColorValue=255), type="l", lwd=3, lty=2)
	# legend
	legend(2,0.255,legend=c("MrP", "MrsP", "Disaggregation"), pch=c(0, 20, 15), col=c(rgb(255,0,0,150, maxColorValue=255),rgb(0,0,255,150, maxColorValue=255),rgb(0, 0, 0, 80, maxColorValue=255)), bty="n",lwd=c(2,2,3),lty=c(NA,NA,2), pt.cex=c(1.2,1,NA), cex=1.2)
# Zoom - In
xpoly <- c(195,235,235,195)
ypoly <- c(-0.00333,-0.01,0.2,0.07)
arrows(xpoly[1],ypoly[1],xpoly[2],ypoly[2], col="black", lty=2,xpd=NA)
arrows(xpoly[4],ypoly[4],xpoly[3],ypoly[3], col="black", lty=2,xpd=NA)
polygon(xpoly,ypoly, col=rgb(190,190,190,50,maxColorValue=255), xpd=NA, border=NA)

# 2nd plot
plot(mse.order[,4], mse.order[,2], pch=1, col="white", ylim=c(-0.0033,0.07), xlim=c(0,185), xlab="186 Swiss Referendums", ylab="Mean Absolute Error", main="", cex.lab=1.5)
	# lines
	abline(h=c(0,0.012,0.024,0.036,0.048,0.060,0.072), lty=2, col="grey78", lwd=0.5)
	abline(v=c(0,25,50,75,100,125,150, 175), lty=2, col="grey78", lwd=0.5)
	abline(h=0, col="gray")
	# points
	points(mse.order[,4], mse.order[,2], pch=20, col=rgb(0,0,255,150, maxColorValue=255),cex=0.923, lwd=2)
	points(mse.order[,4], mse.order[,1], pch=0, col=rgb(255,0,0,150, maxColorValue=255),cex=0.923, lwd=1.5)
dev.off()	

# Plot 2: Analyzing Right Wing Question with SVP party ID
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


pred.mrp.marginal1 <- prediction.mrsp[2,,]
pred.mrp.joint.l2svp1 <- prediction.mrp[2,,]
pred.mrp.joint.1 <- prediction.mrp[1,,]

Title <- c("Municipal Town Hall Approval \n of Naturalization Decisions", "Limitation of Government’s Right\n to Communicate on Referendums", "Referendum Against an\n Increase of the VAT", "Ban Against the \n Construction of Minarets")

png("Figure03.png", width = 1200, height = 900, points=18)
#par(family="CMU Serif",mfrow=c(2,2), mar=c(5.1, 5.1, 4.1, 2.1))
par(mfrow=c(2,2), mar=c(5.1, 5.1, 4.1, 2.1))
for (i in c(165,166,176,180)){
	table(voxit.data$Vorlage[voxit.data$AbstNrBFS==vote.ids[i]])
meanDeviation.marg1 <- mean(unlist(abs(pred.mrp.marginal1[,i] - true.yesShare[i,-27]/100)^2))
meanDeviation.joinL21 <- mean(unlist(abs(pred.mrp.joint.l2svp1[,i] - true.yesShare[i,-27]/100)^2))
meanDeviation.joinLempty <- mean(unlist(abs(pred.mrp.joint.1[,i] - true.yesShare[i,-27]/100)^2))

	plot(pred.mrp.marginal1[,i], true.yesShare[i,-27]/100,  xlim=c(0,1), ylim=c(0,1), col="blue",pch=19, xlab="Predictions", ylab="Vote Outcome", main=paste(Title[which(i==c(165,166,176,180))]), cex.main=1.8, cex.lab=1.5, cex.axis=1.2)
	points(pred.mrp.joint.l2svp1[,i],true.yesShare[i,-27]/100, col=rgb(255,0,0,250, maxColorValue=255),pch=2,cex=1.1, lwd=1.5)

	abline(reg=c(0,1), col="grey")
	suppressWarnings(legend(0.03,1.07, legend=c(paste("MrsP  MSE:", round(meanDeviation.marg1,3)),paste("MrP  MSE:", round(meanDeviation.joinL21,3))), pch=c(19,2), pt.lwd=c(1,1.5), cex=c(1.5,1.5), col=c("blue", "red"), bty="n", text.col=c("blue", "red")))
	text(0.08,0.04,paste("Proportional Reduction of Error:", 100*round((meanDeviation.joinL21-meanDeviation.marg1)/meanDeviation.joinL21,3),"%"), pos=4, col="grey40", cex=1.2)
	}
dev.off()

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~



